home *** CD-ROM | disk | FTP | other *** search
/ Amiga Format CD 52 / Amiga Format AFCD52 (Issue 136, May 2000).iso / -serious- / programming / other / jikes-1.11 / src / gencode.java < prev    next >
Text File  |  2000-02-23  |  18KB  |  399 lines

  1. import java.lang.Integer;
  2. import java.lang.Character;
  3. import java.io.PrintStream;
  4. import java.io.FileOutputStream;
  5.  
  6. class gencode
  7. {
  8.     static final int NEWLINE_CODE      = 1; // \n, \r
  9.     static final int SPACE_CODE        = 2; // \t, \v, \f, ' '
  10.     static final int BAD_CODE          = 3; // everything not covered by other codes ...
  11.     static final int DIGIT_CODE        = 4; // '0'..'9'                    
  12.     static final int OTHER_DIGIT_CODE  = 5; // all unicode digits
  13.     static final int LOWER_CODE        = 6; // 'a'..'z'
  14.     static final int UPPER_CODE        = 7; // 'A'..'Z'
  15.     static final int OTHER_LETTER_CODE = 8; // '$', '_', all other unicode letters
  16.  
  17.     static final int LOG_BASE_SIZE       = 9; // must be a value between 0..16
  18.     static final int LOG_COMPLEMENT_SIZE = (16 - LOG_BASE_SIZE);
  19.     static final int BASE_SIZE           = (1 << LOG_BASE_SIZE);
  20.     static final int SLOT_SIZE           = (1 << LOG_COMPLEMENT_SIZE);
  21.     static final int SLOT_MASK           = (SLOT_SIZE - 1);
  22.  
  23.     static final int BaseIndex(int i) { return i >> LOG_COMPLEMENT_SIZE; }
  24.     static final int DataIndex(int i) { return i & SLOT_MASK; }
  25.  
  26.     static public void main(String args[]) throws java.io.FileNotFoundException, java.io.IOException
  27.     {
  28.         int num_elements = 65536,
  29.             num_slots = BASE_SIZE;
  30.  
  31.         byte base[][] = new byte[BASE_SIZE + 1][SLOT_SIZE];
  32.         if (LOG_BASE_SIZE > 0 && LOG_BASE_SIZE < 16)
  33.         {
  34.             for (int i = 0; i < SLOT_SIZE; i++)
  35.                 base[BASE_SIZE][i] = BAD_CODE;
  36.             num_slots++;
  37.         }
  38.  
  39.         for (int i = 0; i < 65536; i++)
  40.         {
  41.             char a = (char) i;
  42.  
  43.             if (a == '\n' || a == '\r')
  44.                  base[BaseIndex(i)][DataIndex(i)] = NEWLINE_CODE;
  45.             else if (a==' ' || a=='\t' || a=='\f')
  46.                  base[BaseIndex(i)][DataIndex(i)] = SPACE_CODE;
  47.             else if (a < 128 && Character.isLowerCase(a)) // Ascii lower case
  48.                  base[BaseIndex(i)][DataIndex(i)] = LOWER_CODE;
  49.             else if (a < 128 && Character.isUpperCase(a)) // Ascii upper case
  50.                  base[BaseIndex(i)][DataIndex(i)] = UPPER_CODE;
  51.             else if (a < 128 && Character.isDigit(a)) // Ascii digit
  52.                  base[BaseIndex(i)][DataIndex(i)] = DIGIT_CODE;
  53.             else if (Character.isJavaIdentifierStart(a))
  54.                  base[BaseIndex(i)][DataIndex(i)] = OTHER_LETTER_CODE;
  55.             else if (Character.isJavaIdentifierPart(a))
  56.                  base[BaseIndex(i)][DataIndex(i)] = OTHER_DIGIT_CODE;
  57.             else
  58.             {
  59.                  base[BaseIndex(i)][DataIndex(i)] = BAD_CODE;
  60.                  num_elements--;
  61.             }
  62.         }
  63.  
  64.         for (int i = 0; i < BASE_SIZE; i++)
  65.         {
  66.             int k;
  67.             for (k = 0; k < SLOT_SIZE; k++)
  68.             {
  69.                 if (base[i][k] != BAD_CODE)
  70.                     break;
  71.             }
  72.  
  73.             if (k == SLOT_SIZE)
  74.             {
  75.                 base[i] = base[BASE_SIZE];
  76.                 num_slots--;
  77.             }
  78.         }
  79.  
  80.         //
  81.         // Process the code.h file
  82.         //
  83.         PrintStream hfile = new PrintStream(new FileOutputStream("code.h"));
  84.  
  85.         //
  86.         // Process the code.cpp file
  87.         //
  88.         PrintStream cfile = new PrintStream(new FileOutputStream("code.cpp"));
  89.  
  90.         if (LOG_BASE_SIZE > 0 && LOG_BASE_SIZE < 16)
  91.         {
  92.             hfile.println("#ifndef code_INCLUDED");
  93.             hfile.println("#define code_INCLUDED");
  94.             hfile.println();
  95.             hfile.println("#include \"config.h\"");
  96.             hfile.println("#include <ctype.h>");
  97.             hfile.println("#include <assert.h>");
  98.             hfile.println();
  99.             hfile.println("class Code");
  100.             hfile.println("{");
  101.             hfile.println("    //");
  102.             hfile.println("    // To facilitate the scanning, the character set is partitioned into");
  103.             hfile.println("    // 8 classes using the array CODE. The classes are described below");
  104.             hfile.println("    // together with some self-explanatory functions defined on CODE.");
  105.             hfile.println("    //");
  106.             hfile.println("    enum {");
  107.  
  108.             hfile.println("             LOG_BASE_SIZE       = " + LOG_BASE_SIZE + ',');
  109.             hfile.println("             LOG_COMPLEMENT_SIZE = " + LOG_COMPLEMENT_SIZE + ',');
  110.             hfile.println("             BASE_SIZE           = " + BASE_SIZE + ',');
  111.             hfile.println("             SLOT_SIZE           = " + SLOT_SIZE + ',');
  112.             hfile.println("             SLOT_MASK           = " + SLOT_MASK + ',');
  113.             hfile.println();
  114.             hfile.println("             NEWLINE_CODE        = " + NEWLINE_CODE + ',');
  115.             hfile.println("             SPACE_CODE          = " + SPACE_CODE + ',');
  116.             hfile.println("             BAD_CODE            = " + BAD_CODE + ',');
  117.             hfile.println("             DIGIT_CODE          = " + DIGIT_CODE + ',');
  118.             hfile.println("             OTHER_DIGIT_CODE    = " + OTHER_DIGIT_CODE + ',');
  119.             hfile.println("             LOWER_CODE          = " + LOWER_CODE + ',');
  120.             hfile.println("             UPPER_CODE          = " + UPPER_CODE + ',');
  121.             hfile.println("             OTHER_LETTER_CODE   = " + OTHER_LETTER_CODE);
  122.             hfile.println("         };");
  123.             hfile.println();
  124.             hfile.println("    static char code[" + num_slots * SLOT_SIZE + "];");
  125.             hfile.println("    static char *base[" +  BASE_SIZE + "];");
  126.             hfile.println();
  127.             hfile.println();
  128.             hfile.println("public:");
  129.             hfile.println();
  130.             hfile.println("    static inline void SetBadCode(wchar_t c)");
  131.             hfile.println("    {");
  132.             hfile.println("        base[c >> LOG_COMPLEMENT_SIZE][c] = BAD_CODE;");
  133.             hfile.println("    }");
  134.             hfile.println();
  135.             hfile.println("    static inline void CodeCheck(wchar_t c)");
  136.             hfile.println("    {");
  137.             hfile.println("         assert(c >> LOG_COMPLEMENT_SIZE < BASE_SIZE);");
  138.             hfile.println("         assert(base[c >> LOG_COMPLEMENT_SIZE] + c >= (&code[0]));");
  139.             hfile.println("         assert(base[c >> LOG_COMPLEMENT_SIZE] + c < (&code[" + num_slots * SLOT_SIZE + "]));");
  140.             hfile.println("    }");
  141.             hfile.println();
  142.             hfile.println("    static inline bool IsNewline(wchar_t c) // \\r characters are replaced by \\x0a in read_input.");
  143.             hfile.println("    {");
  144.             hfile.println("        return c == '\\x0a';");
  145.             hfile.println("    }");
  146.             hfile.println();
  147.             hfile.println("    static inline bool IsSpaceButNotNewline(wchar_t c)");
  148.             hfile.println("    {");
  149.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == SPACE_CODE;");
  150.             hfile.println("    }");
  151.             hfile.println();
  152.             hfile.println("    static inline bool IsSpace(wchar_t c)");
  153.             hfile.println("    {");
  154.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] <= SPACE_CODE;");
  155.             hfile.println("    }");
  156.             hfile.println();
  157.             hfile.println("    static inline bool IsDigit(wchar_t c)");
  158.             hfile.println("    {");
  159.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == DIGIT_CODE;");
  160.             hfile.println("    }");
  161.             hfile.println();
  162.             hfile.println("    static inline bool IsUpper(wchar_t c)");
  163.             hfile.println("    {");
  164.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == UPPER_CODE;");
  165.             hfile.println("    }");
  166.             hfile.println();
  167.             hfile.println("    static inline bool IsLower(wchar_t c)");
  168.             hfile.println("    {");
  169.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == LOWER_CODE;");
  170.             hfile.println("    }");
  171.             hfile.println();
  172.             hfile.println("    static inline bool IsAlpha(wchar_t c)");
  173.             hfile.println("    {");
  174.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] >= LOWER_CODE;");
  175.             hfile.println("    }");
  176.             hfile.println();
  177.             hfile.println("    static inline bool IsAlnum(wchar_t c)");
  178.             hfile.println("    {");
  179.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] >= DIGIT_CODE;");
  180.             hfile.println("    }");
  181.             hfile.println();
  182.             hfile.println();
  183.             hfile.println("};");
  184.             hfile.println();
  185.             hfile.println("#endif");
  186.  
  187.             cfile.println("#include \"code.h\"");
  188.             cfile.println();
  189.             cfile.println("char Code::code[" +  num_slots * SLOT_SIZE + "] =");
  190.             cfile.println("{");
  191.  
  192.             int base_index[] = new int[BASE_SIZE + 1],
  193.                 num = 0;
  194.  
  195.             for (int j = 0; j <= BASE_SIZE; j++)
  196.             {
  197.                 cfile.println("    //");
  198.                 cfile.println("    // Slot " + j  + ":");
  199.                 cfile.println("    //");
  200.  
  201.                 byte slot[] = base[j];
  202.                 if (j != BASE_SIZE && slot == base[BASE_SIZE])
  203.                 {
  204.                     base_index[j] = -1;
  205.                 }
  206.                 else
  207.                 {
  208.                     base_index[j] = num;
  209.                     num += SLOT_SIZE;
  210.                     for (int k = 0; k < SLOT_SIZE; k += 4)
  211.                     {
  212.                         for (int l = 0; l < 4; l++)
  213.                         {
  214.                             cfile.print(l == 0 ? "    " : " ");
  215.                             switch(slot[k + l])
  216.                             {
  217.                                 case NEWLINE_CODE:
  218.                                      cfile.print("NEWLINE_CODE,");
  219.                                      break;
  220.                                 case SPACE_CODE:
  221.                                      cfile.print("SPACE_CODE,");
  222.                                      break;
  223.                                 case BAD_CODE:
  224.                                      cfile.print("BAD_CODE,");
  225.                                      break;
  226.                                 case DIGIT_CODE:
  227.                                      cfile.print("DIGIT_CODE,");
  228.                                      break;
  229.                                 case OTHER_DIGIT_CODE:
  230.                                      cfile.print("OTHER_DIGIT_CODE,");
  231.                                      break;
  232.                                 case LOWER_CODE:
  233.                                      cfile.print("LOWER_CODE,");
  234.                                      break;
  235.                                 case UPPER_CODE:
  236.                                      cfile.print("UPPER_CODE,");
  237.                                      break;
  238.                                 default:
  239.                                      cfile.print("OTHER_LETTER_CODE,");
  240.                                      break;
  241.                             }
  242.                         }
  243.                         cfile.println();
  244.                     }
  245.                 }
  246.  
  247.                 cfile.println();
  248.             }
  249.  
  250.             cfile.println("};");
  251.  
  252.             cfile.println();
  253.             cfile.println();
  254.             cfile.println("//");
  255.             cfile.println("// The Base vector:");
  256.             cfile.println("//");
  257.             cfile.println("char *Code::base[" + BASE_SIZE + "] =");
  258.             cfile.println("{");
  259.             for (int k = 0; k < BASE_SIZE; k += 4)
  260.             {
  261.                 for (int i = 0; i < 4; i++)
  262.                 {
  263.                     int j = k + i;
  264.                     cfile.print(i == 0 ? "   " : " ");
  265.                     cfile.print(" &code[" + 
  266.                                 (base_index[j] >= 0 ? base_index[j] : base_index[BASE_SIZE]) +
  267.                                 "] - " +
  268.                                 (j * SLOT_SIZE) +
  269.                                   ",");
  270.                 }
  271.                 cfile.println();
  272.             }
  273.             cfile.println("};");
  274.  
  275.             //
  276.             // Print Statistics
  277.             //
  278.             System.out.println(" The number of slots used is " + num_slots);
  279.             System.out.println(" Total static storage utilization is " +
  280.                                num_slots * SLOT_SIZE + " bytes for encoding plus " +
  281.                                BASE_SIZE * 4 + " bytes for the base");
  282.             System.out.println(" The number of unicode characters is " + num_elements);
  283.             System.out.println(" Total static storage utilization is 65536");
  284.         }
  285.         else
  286.         {
  287.             hfile.println("#ifndef code_INCLUDED");
  288.             hfile.println("#define code_INCLUDED");
  289.             hfile.println();
  290.             hfile.println("#include \"config.h\"");
  291.             hfile.println("#include <ctype.h>");
  292.             hfile.println("#include \"bool.h\"");
  293.             hfile.println();
  294.             hfile.println("class Code");
  295.             hfile.println("{");
  296.             hfile.println("    //");
  297.             hfile.println("    // To facilitate the scanning, the character set is partitioned into");
  298.             hfile.println("    // 8 classes using the array CODE. The classes are described below");
  299.             hfile.println("    // together with some self-explanatory functions defined on CODE.");
  300.             hfile.println("    //");
  301.             hfile.println("    enum {");
  302.             hfile.println("             NEWLINE_CODE      = " + NEWLINE_CODE + ",");
  303.             hfile.println("             SPACE_CODE        = " + SPACE_CODE + ",");
  304.             hfile.println("             BAD_CODE          = " + BAD_CODE + ",");
  305.             hfile.println("             DIGIT_CODE        = " + DIGIT_CODE + ",");
  306.             hfile.println("             OTHER_DIGIT_CODE  = " + OTHER_DIGIT_CODE + ",");
  307.             hfile.println("             LOWER_CODE        = " + LOWER_CODE + ",");
  308.             hfile.println("             UPPER_CODE        = " + UPPER_CODE + ",");
  309.             hfile.println("             OTHER_LETTER_CODE = " + OTHER_LETTER_CODE);
  310.             hfile.println("         };");
  311.             hfile.println();
  312.             hfile.println("    static char code[65536];");
  313.             hfile.println();
  314.             hfile.println();
  315.             hfile.println("public:");
  316.             hfile.println();
  317.             hfile.println("    //");
  318.             hfile.println("    // \\r characters are replaced by \\x0a in read_input.");
  319.             hfile.println("    //");
  320.             hfile.println("    static inline bool IsNewline(wchar_t c)            { return c == '\\x0a'; }");
  321.             hfile.println("    static inline bool IsSpaceButNotNewline(wchar_t c) { return code[c] == SPACE_CODE; }");
  322.             hfile.println("    static inline bool IsSpace(wchar_t c)              { return code[c] <= SPACE_CODE; }");
  323.             hfile.println("    static inline bool IsDigit(wchar_t c)              { return code[c] == DIGIT_CODE; }");
  324.             hfile.println("    static inline bool IsUpper(wchar_t c)              { return code[c] == UPPER_CODE; }");
  325.             hfile.println("    static inline bool IsLower(wchar_t c)              { return code[c] == LOWER_CODE; }");
  326.             hfile.println("    static inline bool IsAlpha(wchar_t c)              { return code[c] >= LOWER_CODE; }");
  327.             hfile.println("    static inline bool IsAlnum(wchar_t c)              { return code[c] >= DIGIT_CODE; }");
  328.             hfile.println();
  329.             hfile.println("};");
  330.             hfile.println();
  331.             hfile.println("#endif");
  332.  
  333.             cfile.println("#include \"code.h\"");
  334.             cfile.println();
  335.             cfile.println("char Code::code[65536] =");
  336.             cfile.println("{");
  337.  
  338.             int k = 0;
  339.             for (int i = 0; i < 65536; i += 256)
  340.             {
  341.                 cfile.println("    //");
  342.                 cfile.println("    // Slot " + i + ":");
  343.                 cfile.println("    //");
  344.  
  345.                 for (int j = 0; j < 256; j += 4)
  346.                 {
  347.                     for (int l = 0; l < 4; l++)
  348.                     {
  349.                         byte b = base[BaseIndex(k)][DataIndex(k)];
  350.                         k++;
  351.                         cfile.print(l == 0 ? "    " : " ");
  352.                         switch(b)
  353.                         {
  354.                             case NEWLINE_CODE:
  355.                                  cfile.print("NEWLINE_CODE,");
  356.                                  break;
  357.                             case SPACE_CODE:
  358.                                  cfile.print("SPACE_CODE,");
  359.                                  break;
  360.                             case BAD_CODE:
  361.                                  cfile.print("BAD_CODE,");
  362.                                  break;
  363.                             case DIGIT_CODE:
  364.                                  cfile.print("DIGIT_CODE,");
  365.                                  break;
  366.                             case OTHER_DIGIT_CODE:
  367.                                  cfile.print("OTHER_DIGIT_CODE,");
  368.                                  break;
  369.                             case LOWER_CODE:
  370.                                  cfile.print("LOWER_CODE,");
  371.                                  break;
  372.                             case UPPER_CODE:
  373.                                  cfile.print("UPPER_CODE,");
  374.                                  break;
  375.                             default:
  376.                                  cfile.print("OTHER_LETTER_CODE,");
  377.                                  break;
  378.                         }
  379.                     }
  380.                     cfile.println();
  381.                 }
  382.  
  383.                 cfile.println();
  384.             }
  385.  
  386.             cfile.println("};");
  387.  
  388.             //
  389.             // Print Statistics
  390.             //
  391.             System.out.println(" The number of unicode letters is " + num_elements);
  392.             System.out.println(" Total static storage utilization is 65536");
  393.         }
  394.  
  395.         hfile.close();
  396.         cfile.close();
  397.     }
  398. }
  399.